package com.lezai.test;

import org.apache.http.HttpResponse;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class Scrapy {
    public static void main(String[] args) throws Exception {
        String url = "http://wjw.ah.gov.cn/site/label/8888?IsAjax=1&dataType=html&_=0.5073046934752028&labelName=publicInfoList&siteId=6788021&pageSize=920&pageIndex=1&action=list&isDate=true&dateFormat=yyyy-MM-dd&length=50&organId=7001&type=4&catId=6714211&cId=&result=%E6%9A%82%E6%97%A0%E7%9B%B8%E5%85%B3%E4%BF%A1%E6%81%AF&file=%2Fwjw%2FpublicInfoList_wjw&year=";
        HttpResponse httpResponse = HttpUtils.doGet(url, "", "", null, null);
        String html = convertStreamToString(httpResponse.getEntity().getContent());
        System.out.println(html);
        parseHtml(html);

    }

    private static void parseHtml(String html) {
        //6.Jsoup解析html
        Document document = Jsoup.parse(html);
        //像js一样，通过class 获取列表下的所有博客
        Elements postItems = document.getElementsByClass("xxgk_navli");
        System.out.println(postItems.size());
       List<Map> dats= new ArrayList();
        System.out.println("总条数："+dats.size());
        for (Element element:postItems){
            String date = element.getElementsByClass("vitem").get(3).getElementsByTag("p").get(0).text();
            Element element1 = element.getElementsByClass("nr").get(0);
            String title = element1.text();
            String href = element1.getElementsByTag("a").get(0).attributes().get("href");
            System.out.println(title+"::"+href+"::"+date);
            Map map = new HashMap();
            map.put("title",title);
            map.put("url",href);
            map.put("date",date);
            dats.add(map);
        }
        ExcelUtils.writeExcel(dats,3,"/Users/yangle/Desktop/temp.xlsx");

    }

    public  static String convertStreamToString(InputStream is) {
        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
        StringBuilder sb = new StringBuilder();

        String line = null;
        try {
            while ((line = reader.readLine()) != null) {
                sb.append(line + "\n");
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                is.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

        return sb.toString();
    }


}
